Introduction

Here goes introduction text

Theory?

Here goes theory. Do we need theory?

Data

Basic description of the data.

Creating document feature matrix.

#DFM creation from tokens, removing stopwords, and stemming.
dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)
Creating a dfm from a tokens object ...
   ... lowercasing
   ... found 7,701 documents, 55,199 features
   ... dfm_select removed 541 features in 0 documents, padding 0s for 0 features and 0 documents.
   ... stemming features (English), trimmed 21554 feature variants
   ... created a 7,701 x 33,104 sparse dfm
   ... complete. 
Elapsed time: 1.51 seconds.
#Showing 100 most frequrent tokens in DFM
topfeatures(dfm, n = 100)
   nation   countri    intern      unit   develop      peac     world     state     peopl 
   159615    128836    126205    124521    112314     95337     92927     87853     86328 
    secur    econom   general    govern     organ      year    effort     human   assembl 
    64208     56855     54784     49899     48573     47104     43406     42674     41423 
  support communiti   continu    region   problem     polit     right      time    africa 
    41419     41356     40713     39693     38024     36489     36175     31795     31709 
  session   council    import    member  conflict    achiev      work   resolut     south 
    30971     30319     29908     29636     29197     28715     27989     27381     27046 
     hope    global    situat      make    presid    effect       war     relat     order 
    26830     26815     26404     25940     25865     25113     24888     24845     24817 
   republ    confer  principl establish      forc      oper   nuclear    commit     solut 
    24421     24106     23621     23321     23239     23224     23218     22850     22512 
  respons    action    social   process     power   respect   concern     great      made 
    22473     22472     22233     21940     21921     21897     21776     21679     20917 
     call    cooper    polici      part      issu    negoti  independ    weapon  progress 
    20824     20324     20282     20254     20155     20146     19974     19846     19760 
  african  interest    system contribut   increas implement     today      east       end 
    19671     19401     19352     19318     19192     19175     19155     18887     18872 
secretari agreement territori    remain       arm    recent     posit      live  challeng 
    18709     18389     17662     17623     17336     17177     16949     16771     16616 
     meet   charter    result   resourc      area  democrat     major     adopt     ensur 
    16524     16408     16377     16337     16335     16286     16248     16220     16098 
      law 
    16022 
#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)
dfm_select removed 3,926 features in 0 documents, padding 0s for 0 features and 0 documents.
#100 least frequent terms
topfeatures(dfm.m, n = 100, decreasing = FALSE)
       berlintokyo      angloamerican        glasborough sovietczechoslovak               nang 
                 1                  1                  1                  1                  1 
   imperialistisra              laird       americanisra      rogersgromyko         sovietwest 
                 1                  1                  1                  1                  1 
       revengeseek               hich            redivid             xnajor    marxistleninist 
                 1                  1                  1                  1                  1 
       noneuropean           bautista            alberdi              wnidi            vietmam 
                 1                  1                  1                  1                  1 
            snouid       antihitlerit               sqon          nonapplic              gbout 
                 1                  1                  1                  1                  1 
   antiimperialist       pmrticiputlc          largescal           watermil               iust 
                 1                  1                  1                  1                  1 
         clepsydra            materia            uncouth            ipresid             trario 
                 1                  1                  1                  1                  1 
             fella            mildest             misael      aggiornamento         anglosaxon 
                 1                  1                  1                  1                  1 
             ttier            midsumm             perfor              khiem             samdec 
                 1                  1                  1                  1                  1 
        antikorean        phoenixvill       africanasian    americanzionist          noncombat 
                 1                  1                  1                  1                  1 
     briandkellogg           goodrich               jian               dubo             regenc 
                 1                  1                  1                  1                  1 
             perma               nent        montecatini              ibidj            plywood 
                 1                  1                  1                  1                  1 
              sime            ilswllf              komba              preci           peterson 
                 1                  1                  1                  1                  1 
        welldeserv      latinamerican      quaisonsackey             apeopl           dumardai 
                 1                  1                  1                  1                  1 
           auprinc       radhakrishna            scheldt    zionistamerican          intraarab 
                 1                  1                  1                  1                  1 
         henpresid     sovietegyptian             stowel              tarah              eilat 
                 1                  1                  1                  1                  1 
         antisemit               thew            bolzano        nationalrat           trentino 
                 1                  1                  1                  1                  1 
           giusepp            saragat               tbid         powergener   antinorthvietnam 
                 1                  1                  1                  1                  1 
          iifitlim        millipitfnn           vviuiuui          uuiiuyiug              aiula 
                 1                  1                  1                  1                  1 
            uaiiei             iigjjg           annexion           embitter           mountabl 
                 1                  1                  1                  1                  1 
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")

Analysis

Wordscore based positions

One possiblity to assess positions of EU member states with text data is to use Wordscore – a methodology familiar to political scientists. Here we applying it to UNGD speeches to calculate measures of polarization in the UN on a dimension structured by the main (?) conflictual dimension of international politics – USA vs Russia.

One alternative would be to assess member states’ positions on another dimension of competition – USA vs China. This is more a trial as China-USA dimension may not be salient enough to structure foreign policy preferences of EU member states. At least not salient for now (?), but we should see convergence around 1989/1990.

We can look at standard deviation of EU positions on these two dimensions. The hypothesis is that there’s increasing convergence (lower variation) between EU member states as a result of socialization.

#Logicals for EU member states
EU <- c("BEL", "FRA", "DEU", "ITA", "LUX", "NLD")
rusa <- rusa %>% mutate(is.eu = Country %in% EU)
#first wave
rusa$is.eu[rusa$Country== "DNK" & rusa$Year >1972] <- TRUE
rusa$is.eu[rusa$Country== "IRL" & rusa$Year >1972] <- TRUE
rusa$is.eu[rusa$Country== "GBR" & rusa$Year >1972] <- TRUE
#second wave
rusa$is.eu[rusa$Country== "GRC" & rusa$Year >1980] <- TRUE
#third wave
rusa$is.eu[rusa$Country== "ESP" & rusa$Year >1985] <- TRUE
rusa$is.eu[rusa$Country== "PRT" & rusa$Year >1985] <- TRUE
#fourth wave
rusa$is.eu[rusa$Country== "AUT" & rusa$Year >1994] <- TRUE
rusa$is.eu[rusa$Country== "FIN" & rusa$Year >1994] <- TRUE
rusa$is.eu[rusa$Country== "SWE" & rusa$Year >1994] <- TRUE
#fifth wave
rusa$is.eu[rusa$Country== "CZE" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "HUN" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "POL" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "EST" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "LVA" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "LTU" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "CYP" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "MLT" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "SVK" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "SVN" & rusa$Year >2003] <- TRUE
#sixth wave
rusa$is.eu[rusa$Country== "BGR" & rusa$Year >2006] <- TRUE
rusa$is.eu[rusa$Country== "ROU" & rusa$Year >2006] <- TRUE
#seventh wave
rusa$is.eu[rusa$Country== "HRV" & rusa$Year >2012] <- TRUE
#The same for the China-US dimension:
chnusa <- chnusa %>% mutate(is.eu = Country %in% EU)
#first wave
chnusa$is.eu[chnusa$Country== "DNK" & chnusa$Year >1972] <- TRUE
chnusa$is.eu[chnusa$Country== "IRL" & chnusa$Year >1972] <- TRUE
chnusa$is.eu[chnusa$Country== "GBR" & chnusa$Year >1972] <- TRUE
#second wave
chnusa$is.eu[chnusa$Country== "GRC" & chnusa$Year >1980] <- TRUE
#third wave
chnusa$is.eu[chnusa$Country== "ESP" & chnusa$Year >1985] <- TRUE
chnusa$is.eu[chnusa$Country== "PRT" & chnusa$Year >1985] <- TRUE
#fourth wave
chnusa$is.eu[chnusa$Country== "AUT" & chnusa$Year >1994] <- TRUE
chnusa$is.eu[chnusa$Country== "FIN" & chnusa$Year >1994] <- TRUE
chnusa$is.eu[chnusa$Country== "SWE" & chnusa$Year >1994] <- TRUE
#fifth wave
chnusa$is.eu[chnusa$Country== "CZE" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "HUN" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "POL" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "EST" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "LVA" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "LTU" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "CYP" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "MLT" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "SVK" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "SVN" & chnusa$Year >2003] <- TRUE
#sixth wave
chnusa$is.eu[chnusa$Country== "BGR" & chnusa$Year >2006] <- TRUE
chnusa$is.eu[chnusa$Country== "ROU" & chnusa$Year >2006] <- TRUE
#seventh wave
chnusa$is.eu[chnusa$Country== "HRV" & chnusa$Year >2012] <- TRUE
derus <- derus %>% mutate(is.eu = Country %in% EU)
#first wave
derus$is.eu[derus$Country== "DNK" & derus$Year >1972] <- TRUE
derus$is.eu[derus$Country== "IRL" & derus$Year >1972] <- TRUE
derus$is.eu[derus$Country== "GBR" & derus$Year >1972] <- TRUE
#second wave
derus$is.eu[derus$Country== "GRC" & derus$Year >1980] <- TRUE
#third wave
derus$is.eu[derus$Country== "ESP" & derus$Year >1985] <- TRUE
derus$is.eu[derus$Country== "PRT" & derus$Year >1985] <- TRUE
#fourth wave
derus$is.eu[derus$Country== "AUT" & derus$Year >1994] <- TRUE
derus$is.eu[derus$Country== "FIN" & derus$Year >1994] <- TRUE
derus$is.eu[derus$Country== "SWE" & derus$Year >1994] <- TRUE
#fifth wave
derus$is.eu[derus$Country== "CZE" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "HUN" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "POL" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "EST" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "LVA" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "LTU" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "CYP" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "MLT" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "SVK" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "SVN" & derus$Year >2003] <- TRUE
#sixth wave
derus$is.eu[derus$Country== "BGR" & derus$Year >2006] <- TRUE
derus$is.eu[derus$Country== "ROU" & derus$Year >2006] <- TRUE
#seventh wave
derus$is.eu[derus$Country== "HRV" & derus$Year >2012] <- TRUE
deusa <- deusa %>% mutate(is.eu = Country %in% EU)
#first wave
deusa$is.eu[deusa$Country== "DNK" & deusa$Year >1972] <- TRUE
deusa$is.eu[deusa$Country== "IRL" & deusa$Year >1972] <- TRUE
deusa$is.eu[deusa$Country== "GBR" & deusa$Year >1972] <- TRUE
#second wave
deusa$is.eu[deusa$Country== "GRC" & deusa$Year >1980] <- TRUE
#third wave
deusa$is.eu[deusa$Country== "ESP" & deusa$Year >1985] <- TRUE
deusa$is.eu[deusa$Country== "PRT" & deusa$Year >1985] <- TRUE
#fourth wave
deusa$is.eu[deusa$Country== "AUT" & deusa$Year >1994] <- TRUE
deusa$is.eu[deusa$Country== "FIN" & deusa$Year >1994] <- TRUE
deusa$is.eu[deusa$Country== "SWE" & deusa$Year >1994] <- TRUE
#fifth wave
deusa$is.eu[deusa$Country== "CZE" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "HUN" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "POL" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "EST" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "LVA" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "LTU" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "CYP" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "MLT" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "SVK" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "SVN" & deusa$Year >2003] <- TRUE
#sixth wave
deusa$is.eu[deusa$Country== "BGR" & deusa$Year >2006] <- TRUE
deusa$is.eu[deusa$Country== "ROU" & deusa$Year >2006] <- TRUE
#seventh wave
deusa$is.eu[deusa$Country== "HRV" & deusa$Year >2012] <- TRUE
luxerus <- luxerus %>% mutate(is.eu = Country %in% EU)
#first wave
luxerus$is.eu[luxerus$Country== "DNK" & luxerus$Year >1972] <- TRUE
luxerus$is.eu[luxerus$Country== "IRL" & luxerus$Year >1972] <- TRUE
luxerus$is.eu[luxerus$Country== "GBR" & luxerus$Year >1972] <- TRUE
#second wave
luxerus$is.eu[luxerus$Country== "GRC" & luxerus$Year >1980] <- TRUE
#third wave
luxerus$is.eu[luxerus$Country== "ESP" & luxerus$Year >1985] <- TRUE
luxerus$is.eu[luxerus$Country== "PRT" & luxerus$Year >1985] <- TRUE
#fourth wave
luxerus$is.eu[luxerus$Country== "AUT" & luxerus$Year >1994] <- TRUE
luxerus$is.eu[luxerus$Country== "FIN" & luxerus$Year >1994] <- TRUE
luxerus$is.eu[luxerus$Country== "SWE" & luxerus$Year >1994] <- TRUE
#fifth wave
luxerus$is.eu[luxerus$Country== "CZE" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "HUN" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "POL" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "EST" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "LVA" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "LTU" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "CYP" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "MLT" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "SVK" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "SVN" & luxerus$Year >2003] <- TRUE
#sixth wave
luxerus$is.eu[luxerus$Country== "BGR" & luxerus$Year >2006] <- TRUE
luxerus$is.eu[luxerus$Country== "ROU" & luxerus$Year >2006] <- TRUE
#seventh wave
luxerus$is.eu[luxerus$Country== "HRV" & luxerus$Year >2012] <- TRUE
luxus <- luxus %>% mutate(is.eu = Country %in% EU)
#first wave
luxus$is.eu[luxus$Country== "DNK" & luxus$Year >1972] <- TRUE
luxus$is.eu[luxus$Country== "IRL" & luxus$Year >1972] <- TRUE
luxus$is.eu[luxus$Country== "GBR" & luxus$Year >1972] <- TRUE
#second wave
luxus$is.eu[luxus$Country== "GRC" & luxus$Year >1980] <- TRUE
#third wave
luxus$is.eu[luxus$Country== "ESP" & luxus$Year >1985] <- TRUE
luxus$is.eu[luxus$Country== "PRT" & luxus$Year >1985] <- TRUE
#fourth wave
luxus$is.eu[luxus$Country== "AUT" & luxus$Year >1994] <- TRUE
luxus$is.eu[luxus$Country== "FIN" & luxus$Year >1994] <- TRUE
luxus$is.eu[luxus$Country== "SWE" & luxus$Year >1994] <- TRUE
#fifth wave
luxus$is.eu[luxus$Country== "CZE" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "HUN" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "POL" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "EST" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "LVA" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "LTU" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "CYP" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "MLT" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "SVK" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "SVN" & luxus$Year >2003] <- TRUE
#sixth wave
luxus$is.eu[luxus$Country== "BGR" & luxus$Year >2006] <- TRUE
luxus$is.eu[luxus$Country== "ROU" & luxus$Year >2006] <- TRUE
#seventh wave
luxus$is.eu[luxus$Country== "HRV" & luxus$Year >2012] <- TRUE
pal <- pal %>% mutate(is.eu = Country %in% EU)
#first wave
pal$is.eu[pal$Country== "DNK" & pal$Year >1972] <- TRUE
pal$is.eu[pal$Country== "IRL" & pal$Year >1972] <- TRUE
pal$is.eu[pal$Country== "GBR" & pal$Year >1972] <- TRUE
#second wave
pal$is.eu[pal$Country== "GRC" & pal$Year >1980] <- TRUE
#third wave
pal$is.eu[pal$Country== "ESP" & pal$Year >1985] <- TRUE
pal$is.eu[pal$Country== "PRT" & pal$Year >1985] <- TRUE
#fourth wave
pal$is.eu[pal$Country== "AUT" & pal$Year >1994] <- TRUE
pal$is.eu[pal$Country== "FIN" & pal$Year >1994] <- TRUE
pal$is.eu[pal$Country== "SWE" & pal$Year >1994] <- TRUE
#fifth wave
pal$is.eu[pal$Country== "CZE" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "HUN" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "POL" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "EST" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "LVA" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "LTU" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "CYP" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "MLT" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "SVK" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "SVN" & pal$Year >2003] <- TRUE
#sixth wave
pal$is.eu[pal$Country== "BGR" & pal$Year >2006] <- TRUE
pal$is.eu[pal$Country== "ROU" & pal$Year >2006] <- TRUE
#seventh wave
pal$is.eu[pal$Country== "HRV" & pal$Year >2012] <- TRUE
presus <- presus %>% mutate(is.eu = Country %in% EU)
#first wave
presus$is.eu[presus$Country== "DNK" & presus$Year >1972] <- TRUE
presus$is.eu[presus$Country== "IRL" & presus$Year >1972] <- TRUE
presus$is.eu[presus$Country== "GBR" & presus$Year >1972] <- TRUE
#second wave
presus$is.eu[presus$Country== "GRC" & presus$Year >1980] <- TRUE
#third wave
presus$is.eu[presus$Country== "ESP" & presus$Year >1985] <- TRUE
presus$is.eu[presus$Country== "PRT" & presus$Year >1985] <- TRUE
#fourth wave
presus$is.eu[presus$Country== "AUT" & presus$Year >1994] <- TRUE
presus$is.eu[presus$Country== "FIN" & presus$Year >1994] <- TRUE
presus$is.eu[presus$Country== "SWE" & presus$Year >1994] <- TRUE
#fifth wave
presus$is.eu[presus$Country== "CZE" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "HUN" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "POL" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "EST" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "LVA" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "LTU" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "CYP" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "MLT" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "SVK" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "SVN" & presus$Year >2003] <- TRUE
#sixth wave
presus$is.eu[presus$Country== "BGR" & presus$Year >2006] <- TRUE
presus$is.eu[presus$Country== "ROU" & presus$Year >2006] <- TRUE
#seventh wave
presus$is.eu[presus$Country== "HRV" & presus$Year >2012] <- TRUE
presusalt <- presusalt %>% mutate(is.eu = Country %in% EU)
#first wave
presusalt$is.eu[presusalt$Country== "DNK" & presusalt$Year >1972] <- TRUE
presusalt$is.eu[presusalt$Country== "IRL" & presusalt$Year >1972] <- TRUE
presusalt$is.eu[presusalt$Country== "GBR" & presusalt$Year >1972] <- TRUE
#second wave
presusalt$is.eu[presusalt$Country== "GRC" & presusalt$Year >1980] <- TRUE
#third wave
presusalt$is.eu[presusalt$Country== "ESP" & presusalt$Year >1985] <- TRUE
presusalt$is.eu[presusalt$Country== "PRT" & presusalt$Year >1985] <- TRUE
#fourth wave
presusalt$is.eu[presusalt$Country== "AUT" & presusalt$Year >1994] <- TRUE
presusalt$is.eu[presusalt$Country== "FIN" & presusalt$Year >1994] <- TRUE
presusalt$is.eu[presusalt$Country== "SWE" & presusalt$Year >1994] <- TRUE
#fifth wave
presusalt$is.eu[presusalt$Country== "CZE" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "HUN" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "POL" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "EST" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "LVA" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "LTU" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "CYP" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "MLT" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "SVK" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "SVN" & presusalt$Year >2003] <- TRUE
#sixth wave
presusalt$is.eu[presusalt$Country== "BGR" & presusalt$Year >2006] <- TRUE
presusalt$is.eu[presusalt$Country== "ROU" & presusalt$Year >2006] <- TRUE
#seventh wave
presusalt$is.eu[presusalt$Country== "HRV" & presusalt$Year >2012] <- TRUE
presrus <- presrus %>% mutate(is.eu = Country %in% EU)
#first wave
presrus$is.eu[presrus$Country== "DNK" & presrus$Year >1972] <- TRUE
presrus$is.eu[presrus$Country== "IRL" & presrus$Year >1972] <- TRUE
presrus$is.eu[presrus$Country== "GBR" & presrus$Year >1972] <- TRUE
#second wave
presrus$is.eu[presrus$Country== "GRC" & presrus$Year >1980] <- TRUE
#third wave
presrus$is.eu[presrus$Country== "ESP" & presrus$Year >1985] <- TRUE
presrus$is.eu[presrus$Country== "PRT" & presrus$Year >1985] <- TRUE
#fourth wave
presrus$is.eu[presrus$Country== "AUT" & presrus$Year >1994] <- TRUE
presrus$is.eu[presrus$Country== "FIN" & presrus$Year >1994] <- TRUE
presrus$is.eu[presrus$Country== "SWE" & presrus$Year >1994] <- TRUE
#fifth wave
presrus$is.eu[presrus$Country== "CZE" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "HUN" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "POL" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "EST" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "LVA" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "LTU" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "CYP" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "MLT" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "SVK" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "SVN" & presrus$Year >2003] <- TRUE
#sixth wave
presrus$is.eu[presrus$Country== "BGR" & presrus$Year >2006] <- TRUE
presrus$is.eu[presrus$Country== "ROU" & presrus$Year >2006] <- TRUE
#seventh wave
presrus$is.eu[presrus$Country== "HRV" & presrus$Year >2012] <- TRUE
presrusalt <- presrusalt %>% mutate(is.eu = Country %in% EU)
#first wave
presrusalt$is.eu[presrusalt$Country== "DNK" & presrusalt$Year >1972] <- TRUE
presrusalt$is.eu[presrusalt$Country== "IRL" & presrusalt$Year >1972] <- TRUE
presrusalt$is.eu[presrusalt$Country== "GBR" & presrusalt$Year >1972] <- TRUE
#second wave
presrusalt$is.eu[presrusalt$Country== "GRC" & presrusalt$Year >1980] <- TRUE
#third wave
presrusalt$is.eu[presrusalt$Country== "ESP" & presrusalt$Year >1985] <- TRUE
presrusalt$is.eu[presrusalt$Country== "PRT" & presrusalt$Year >1985] <- TRUE
#fourth wave
presrusalt$is.eu[presrusalt$Country== "AUT" & presrusalt$Year >1994] <- TRUE
presrusalt$is.eu[presrusalt$Country== "FIN" & presrusalt$Year >1994] <- TRUE
presrusalt$is.eu[presrusalt$Country== "SWE" & presrusalt$Year >1994] <- TRUE
#fifth wave
presrusalt$is.eu[presrusalt$Country== "CZE" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "HUN" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "POL" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "EST" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "LVA" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "LTU" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "CYP" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "MLT" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "SVK" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "SVN" & presrusalt$Year >2003] <- TRUE
#sixth wave
presrusalt$is.eu[presrusalt$Country== "BGR" & presrusalt$Year >2006] <- TRUE
presrusalt$is.eu[presrusalt$Country== "ROU" & presrusalt$Year >2006] <- TRUE
#seventh wave
presrusalt$is.eu[presrusalt$Country== "HRV" & presrusalt$Year >2012] <- TRUE

Calculating average scores and standard deviations for EU member states.

eu_rusa <- summarise(group_by(filter(rusa, is.eu==TRUE), Year), 
                      score_rusa = mean(wscore), sd_rusa = sd(wscore))
eu_chnusa <- summarise(group_by(filter(chnusa, is.eu==TRUE), Year), 
                      score_chn = mean(wscore), sd_chn = sd(wscore))
eu_derus <- summarise(group_by(filter(derus, is.eu==TRUE), Year), 
                      score_derus = mean(wscore), sd_derus = sd(wscore))
eu_deusa <- summarise(group_by(filter(deusa, is.eu==TRUE), Year), 
                      score_deusa = mean(wscore), sd_deusa = sd(wscore))
eu_luxerus <- summarise(group_by(filter(luxerus, is.eu==TRUE), Year), 
                      score_luxerus = mean(wscore), sd_luxerus = sd(wscore))
eu_luxus <- summarise(group_by(filter(luxus, is.eu==TRUE), Year), 
                      score_luxus = mean(wscore), sd_luxus = sd(wscore))
eu_pal <- summarise(group_by(filter(pal, is.eu==TRUE), Year), 
                      score_pal = mean(wscore), sd_pal = sd(wscore))
eu_presus <- summarise(group_by(filter(presus, is.eu==TRUE), Year), 
                      score_presus = mean(wscore), sd_presus = sd(wscore))
eu_presusalt <- summarise(group_by(filter(presusalt, is.eu==TRUE), Year), 
                      score_presusalt = mean(wscore), sd_presusalt = sd(wscore))
eu_presrus <- summarise(group_by(filter(presrus, is.eu==TRUE), Year), 
                      score_presrus = mean(wscore), sd_presrus = sd(wscore))
eu_presrusalt <- summarise(group_by(filter(presrusalt, is.eu==TRUE), Year), 
                      score_presrusalt = mean(wscore), sd_presrusalt = sd(wscore))
eu_scores <- left_join(eu_rusa, eu_chnusa, by="Year" )
eu_scores <- left_join(eu_scores, eu_derus, by="Year" )
eu_scores <- left_join(eu_scores, eu_deusa, by="Year" )
eu_scores <- left_join(eu_scores, eu_luxerus, by="Year" )
eu_scores <- left_join(eu_scores, eu_luxus, by="Year" )
eu_scores <- left_join(eu_scores, eu_pal, by="Year" )
eu_scores <- left_join(eu_scores, eu_presus, by="Year" )
eu_scores <- left_join(eu_scores, eu_presusalt, by="Year" )
eu_scores <- left_join(eu_scores, eu_presrus, by="Year" )
eu_scores <- left_join(eu_scores, eu_presrusalt, by="Year" )

Plots with wordscore results

Averages

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_rusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_rusa), colour = "blue", se = FALSE) +
  ylab("Average EU score") +
  ggtitle("USA (+1) - Russia (-1) dimension") +
  theme_bw()
ggsave("avscoreRUSA.pdf")
Saving 7.29 x 4.51 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_chn), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_chn), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - China (-1) dimension") +
  theme_bw()
ggsave("avscoreCHN.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_derus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_derus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("DEU (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("avscoreDERUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_deusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_deusa), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - DEU (-1) dimension") +
  theme_bw()
ggsave("avscoreDEUSA.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_luxerus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_luxerus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("LUX (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("avscoreLUXERUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_luxus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_luxus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - LUX (-1) dimension") +
  theme_bw()
ggsave("avscoreLUXUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = subset(eu_scores, Year>1997)) +
  geom_point(aes(x=Year, y=score_pal), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_pal), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("ISR (+1) - PSE (-1) dimension") +
  theme_bw()
ggsave("avscorePAL.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - Presidency (-1) dimension") +
  theme_bw()
ggsave("avscorePRESUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presusalt), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - PresidencyALT (-1) dimension") +
  theme_bw()
ggsave("avscorePRESUSalt.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presrus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presrus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("Presidency (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("avscorePRESRUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presrusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presrusalt), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("PresidencyALT (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("avscorePRESRUSalt.pdf")
Saving 6.5 x 4.02 in image

Standard deviations

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_rusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_rusa), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - Russia (-1) dimension") +
  theme_bw()
ggsave("sdscoreRUSA.pdf")
Saving 7.29 x 4.51 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_chn), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_chn), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - China (-1) dimension") +
  theme_bw()
ggsave("sdscoreCHN.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_derus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_derus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("DEU (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("sdscoreDERUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_deusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_deusa), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - DEU (-1) dimension") +
  theme_bw()
ggsave("sdscoreDEUSA.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_luxerus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_luxerus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("LUX (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("sdscoreLUXERUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_luxus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_luxus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - LUX (-1) dimension") +
  theme_bw()
ggsave("sdscoreLUXUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = subset(eu_scores, Year>1997)) +
  geom_point(aes(x=Year, y=sd_pal), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_pal), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("ISR (+1) - PSE (-1) dimension") +
  theme_bw()
ggsave("sdscorePAL.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - Presidency (-1) dimension") +
  theme_bw()
ggsave("sdscorePRESUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presusalt), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - PresidencyALT (-1) dimension") +
  theme_bw()
ggsave("sdscorePRESUSalt.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presrus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presrus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("Presidency (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("sdscorePRESRUS.pdf")
Saving 6.5 x 4.02 in image

#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presrusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presrusalt), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("PresidencyALT (+1) - RUS (-1) dimension") +
  theme_bw()
ggsave("sdscorePRESRUSalt.pdf")
Saving 6.5 x 4.02 in image

Similarity based socialization

As a direct measure of convergence in foreign policy preferences we calculate similarity between each speech and the official EU policy as stated in speeches by the presiding EU member state, and, after 2007, the speech of EC president. In addition, we calculate similarity to the speech by Luxembourg which in our interviews came through as the country most in line with the EU line.

We use cosine similarity, running estimation by year: i.e. calculates similarity between each country and EU presidency/EC/LUX for each session separately.

Similarity with EU presidency

Similarity with EU presidency, but from 2011 similarity is calculated with EC

Combining all similarity estimates into one file

simil_estimates <- left_join(pres_similarity, pres_similarity_ec, by = c("Country", "Year"))
simil_estimates <- left_join(simil_estimates, LUX_similarity, by = c("Country", "Year"))

Plot of similarities for EU member states:

#Logicals for EU member states
simil_estimates <- simil_estimates %>% mutate(is.eu = Country %in% EU)
#first wave
simil_estimates$is.eu[simil_estimates$Country== "DNK" & simil_estimates$Year >1972] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "IRL" & simil_estimates$Year >1972] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "GBR" & simil_estimates$Year >1972] <- TRUE
#second wave
simil_estimates$is.eu[simil_estimates$Country== "GRC" & simil_estimates$Year >1980] <- TRUE
#third wave
simil_estimates$is.eu[simil_estimates$Country== "ESP" & simil_estimates$Year >1985] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "PRT" & simil_estimates$Year >1985] <- TRUE
#fourth wave
simil_estimates$is.eu[simil_estimates$Country== "AUT" & simil_estimates$Year >1994] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "FIN" & simil_estimates$Year >1994] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "SWE" & simil_estimates$Year >1994] <- TRUE
#fifth wave
simil_estimates$is.eu[simil_estimates$Country== "CZE" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "HUN" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "POL" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "EST" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "LVA" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "LTU" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "CYP" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "MLT" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "SVK" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "SVN" & simil_estimates$Year >2003] <- TRUE
#sixth wave
simil_estimates$is.eu[simil_estimates$Country== "BGR" & simil_estimates$Year >2006] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "ROU" & simil_estimates$Year >2006] <- TRUE
#seventh wave
simil_estimates$is.eu[simil_estimates$Country== "HRV" & simil_estimates$Year >2012] <- TRUE

Creating averages and standard deviations for EU member states.

eu_sim <- summarise(group_by(filter(simil_estimates, is.eu==TRUE), Year), 
                      simil_pres = mean(PRES), sd_simil_pres = sd(PRES), 
                      simil_pres_ec = mean(PRES_ec), sd_simil_pres_ec = sd(PRES_ec),
                      simil_lux = mean(LUX, na.rm = TRUE), sd_simil_lux = sd(LUX, na.rm = TRUE))

Plotting averages and standard deviations of similarities

#Plot of average similarities
ggplot(data = eu_sim) +
  geom_point(aes(x=Year, y=simil_lux), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=simil_lux), na.rm = TRUE, se = FALSE, colour = "blue") +
  geom_point(aes(x=Year, y=simil_pres), colour = "green") +
  geom_smooth(aes(x=Year, y=simil_pres), se = FALSE, colour = "green") +
  geom_point(aes(x=Year, y=simil_pres_ec), colour = "red") +
  geom_smooth(aes(x=Year, y=simil_pres_ec), se = FALSE, colour = "red") +
  ylab("Average EU similarities") +
  annotate("text", x = 1983, y = 0.12, label = "Similarity with Luxembourg", colour = "blue") +
  annotate("text", x = 2000, y = 0.22, label = "Similarity with presidency", colour = "red") +
  theme_bw()
ggsave("avsimil.pdf")
Saving 7.29 x 4.51 in image

#Plot of SDs
ggplot(data = eu_sim) +
#  geom_point(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue") + 
#  geom_smooth(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue", se = FALSE) +
  geom_point(aes(x=Year, y=sd_simil_pres), colour = "green") +
  geom_smooth(aes(x=Year, y=sd_simil_pres), colour = "green", se = FALSE) +
  geom_point(aes(x=Year, y=sd_simil_pres_ec), colour = "red") +
  geom_smooth(aes(x=Year, y=sd_simil_pres_ec), colour = "red", se = FALSE) +
  ylab("SDs of EU similarities") +
 # annotate("text", x = 2000, y = 0.03, label = "Similarity with Luxembourg", colour = "blue") +
  annotate("text", x = 1990, y = 0.075, label = "Similarity with presidency", colour = "red") +
  theme_bw()
ggsave("sdsimil.pdf")
Saving 7.29 x 4.51 in image

Similarity with Luxembourg

#Plot of SDs
ggplot(data = eu_sim) +
  geom_point(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue", se = FALSE) +
#  geom_point(aes(x=Year, y=sd_simil_pres), colour = "green") +
#  geom_smooth(aes(x=Year, y=sd_simil_pres), colour = "green", se = FALSE) +
#  geom_point(aes(x=Year, y=sd_simil_pres_ec), colour = "red") +
#  geom_smooth(aes(x=Year, y=sd_simil_pres_ec), colour = "red", se = FALSE) +
  ylab("SDs of EU similarities") +
  annotate("text", x = 2000, y = 0.03, label = "Similarity with Luxembourg", colour = "blue") +
#  annotate("text", x = 1990, y = 0.075, label = "Similarity with presidency", colour = "red") +
  theme_bw()
ggsave("sdsimillux.pdf")
Saving 7.29 x 4.51 in image

Combining files for output:

readr::write_csv(left_join(eu_sim, eu_scores, by = "Year"), "eu_estimates.csv")
names(presus)[4] <- "PRESUS_wscore"
names(presusalt)[4] <- "PRESUS_alt_wscore"
names(presrus)[4] <- "PRESRUS_wscore"
names(presrusalt)[4] <- "PRESRUS_alt_wscore"
names(derus)[4] <- "DERUS_wscore"
names(deusa)[4] <- "DEUSA_wscore"
names(luxus)[4] <- "LUXUS_wscore"
names(luxerus)[4] <- "LUXERUS_wscore"
names(pal)[4] <- "PAL_wscore"
names(rusa)[4] <- "RUSA_wscore"
names(chnusa)[4] <- "CHNUSA_wscore"
scores <- full_join(presus, presusalt, by=c("Country", "Year"))
scores <- full_join(scores, presrus, by=c("Country", "Year"))
scores <- full_join(scores, presrusalt, by=c("Country", "Year"))
scores <- full_join(scores, derus, by=c("Country", "Year"))
scores <- full_join(scores, deusa, by=c("Country", "Year"))
scores <- full_join(scores, luxus, by=c("Country", "Year"))
scores <- full_join(scores, luxerus, by=c("Country", "Year"))
scores <- full_join(scores, pal, by=c("Country", "Year"))
scores <- full_join(scores, rusa, by=c("Country", "Year"))
scores <- full_join(scores, chnusa, by=c("Country", "Year"))
simil_scores <- full_join(scores, simil_estimates, by=c("Country", "Year"))
readr::write_csv(select(simil_scores[order(simil_scores$Country, simil_scores$Year),], 
                        c(Country, Year, PRES, PRES_ec, LUX, RUSA_wscore, CHNUSA_wscore, 
                          PRESUS_wscore, PRESUS_alt_wscore, PRESRUS_wscore, 
                          PRESRUS_alt_wscore, DERUS_wscore, DEUSA_wscore, LUXUS_wscore, 
                          LUXERUS_wscore, PAL_wscore)), "estimates.csv")

Splines analysis

library(splines)
spl1 <- lm(sd_rusa ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_scores)
pred1 <- predict(spl1, se = TRUE)
ggplot(eu_scores, aes(Year, sd_rusa)) + 
  geom_ribbon(aes(ymin= pred1$fit-2*pred1$se.fit, ymax=pred1$fit+2*pred1$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred1$fit)) +
  theme_bw() +
    ylab("Standard deviation of wordscores for EU") +
  ggtitle("Splines for USA-Russia dimension")
ggsave("sdRUSAspline.pdf")
Saving 7.29 x 4.51 in image

spl2 <- lm(sd_chn ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_scores)
pred2 <- predict(spl2, se = TRUE)
ggplot(eu_scores, aes(Year, sd_chn)) + 
  geom_ribbon(aes(ymin= pred2$fit-2*pred2$se.fit, ymax=pred2$fit+2*pred2$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred2$fit)) +
  theme_bw() +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("Splines for USA-China dimension")
ggsave("sdCHNspline.pdf")
Saving 7.29 x 4.51 in image

spl3 <- lm(sd_simil_pres ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_sim)

pred3 <- predict(spl3, se = TRUE)

ggplot(eu_sim, aes(Year, sd_simil_pres)) + 
  geom_ribbon(aes(ymin= pred3$fit-2*pred3$se.fit, ymax=pred3$fit+2*pred3$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred3$fit)) +
  theme_bw()

ggsave("sdsimilspline.pdf")
spl4 <- lm(sd_simil_pres_ec ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_sim)

pred4 <- predict(spl4, se = TRUE)

ggplot(eu_sim, aes(Year, sd_simil_pres_ec)) + 
  geom_ribbon(aes(ymin= pred4$fit-2*pred4$se.fit, ymax=pred4$fit+2*pred4$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred4$fit)) +
  theme_bw()

ggsave("sdsimilecspline.pdf")
library(tidyr)

lux_sim <- eu_sim %>% drop_na(sd_simil_lux)

spl5 <- lm(sd_simil_lux ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = lux_sim)

pred5 <- predict(spl5, se = TRUE)

ggplot(lux_sim, aes(Year, sd_simil_lux)) + 
  geom_ribbon(aes(ymin= pred5$fit-2*pred5$se.fit, ymax=pred5$fit+2*pred5$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred5$fit)) +
  theme_bw()

ggsave("sdsimilluxspline.pdf")

The coefficients here are essentially slopes of lines for each segment.

screenreg(list(spl1, spl2, spl3, spl4), digits = 3, bold = 0.05, stars = c(0.001, 0.01, 0.05),
          reorder.coef = c(2, 3, 4, 5, 6, 1), 
          custom.coef.names = c("Intercept", "1970-1986", "1987-1993", "1994-1999", 
                                "2000-2009", "2010-present day"), 
          custom.model.names = c("SD EU on RUSA", "SD EU on CHNUSA", "SD Simil Pres", 
                                 "SD Simil Pres (EC) "))
texreg(list(spl1, spl2, spl3, spl4), digits = 3, bold = 0.05, stars = c(0.001, 0.01, 0.05),
          reorder.coef = c(2, 3, 4, 5, 6, 1), 
          custom.coef.names = c("Intercept", "1970-1986", "1987-1993", "1994-1999", 
                                "2000-2009", "2010-present day"), 
          custom.model.names = c("SD EU on RUSA", "SD EU on CHNUSA", "SD Simil Pres", 
                                 "SD Simil Pres (EC) "))
texreg(spl5, digits = 3, bold = 0.05, stars = c(0.001, 0.01, 0.05),
          reorder.coef = c(2, 3, 4, 5, 6, 1), 
          custom.coef.names = c("Intercept", "1970-1986", "1987-1993", "1994-1999", 
                                "2000-2009", "2010-present day"), 
          custom.model.names = "SD Simil Lux")

References

---
title: "The Socialization Effect of EU Membership on Foreign Policy Preferences: Evidence from Debates in the United Nations"
author:
- affiliation: University of Love
  email: n.chelotti@lboro.ac.uk
  name: Nicola Chelotti
- affiliation: University of Birmingham
  email: n.dasandi@bham.ac.uk
  name: Niheer Dasandi
- affiliation: University of Essex
  email: s.mikhaylov@essex.ac.uk
  name: Slava Mikhaylov
date: 20 May 2017
output:
  html_notebook:
    toc: yes
  html_document: default
  pdf_document: 
    toc: yes
  word_document: 
    toc: yes
#biblio-style: apsr
#bibliography: eu.bib
thanks: Paper prepared for presentation at the 75th Annual MPSA Meeting, 6-9 April
  2017, Chicago.
abstract: Does membership of intergovernmental organizations lead to more similar state preferences through a socialization effect? This question has received much attention in IR. Empirical studies focusing on the EU, claim EU membership leads to foreign policy convergence based on analyzing UN voting patterns. We argue the significant coordination between EU member states when voting means voting cohesion demonstrates effective coordination, not a socialization effect. To examine whether EU membership has a socialization effect on member states, we use a new dataset of UN General Debate (GD) statements. Every year, UN member states discuss their perspectives on major international issues in the GD. The lack of coordination and external constraints in delivering GD statements makes them ideal for testing socialization effects on preferences. Interviews with UN representatives of EU members support our argument that there is significant coordination between EU delegations on UN votes, but not in formulating GD statements. We derive estimates of states' foreign policy preferences from GD statements using text analytic techniques, and examine the effect of EU membership and engagement on preferences using these new measures.
---


#Introduction

Here goes introduction text


#Theory?

Here goes theory. Do we need theory?

#Data

Basic description of the data.



```{r, message=FALSE, include=FALSE}
#Loading packages and data
library(readtext)
library(quanteda)
library(dplyr)
library(stringr)
library(ggplot2)
library(rworldmap)
library(RColorBrewer)
library(classInt)
library(vegan)
library(boot)
library(haven)
library(readxl)
library(texreg)
```

```{r warning=FALSE, include=FALSE, warning=FALSE}

if (!require("readtext")) devtools::install_github("kbenoit/readtext")

DATA_DIR <- "~/Dropbox/Research/UN Data/" 

ungd_files <- readtext(paste0(DATA_DIR, "Converted sessions/*"), 
                                 docvarsfrom = "filenames", 
                                 dvsep="_", 
                                 docvarnames = c("Country", "Session", "Year"))


#changing row.names to have only country_year, rather than folder pathway from `readtext`.
row.names(ungd_files) <- str_replace(str_replace(sapply(str_split(row.names(ungd_files), "/"),`[`,2), ".txt", ""), "_\\d{2}", "")

if (!require("quanteda")) devtools::install_github("kbenoit/quanteda")

ungd_corpus <- corpus(ungd_files, text_field = "text") 

```

Creating document feature matrix.

```{r, include=FALSE}
#Tokenization and basic pre-processing
tok <- tokens(ungd_corpus, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)
```

```{r dfm}
#DFM creation from tokens, removing stopwords, and stemming.
dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Showing 100 most frequrent tokens in DFM
topfeatures(dfm, n = 100)

```


```{r}
#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)

#100 least frequent terms
topfeatures(dfm.m, n = 100, decreasing = FALSE)
```


```{r, include=FALSE}
#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#100 least frequent terms in trimmed DFM
topfeatures(dfm.trim, n = 100, decreasing = FALSE)

#Level of sparsity of trimmed DFM
sparsity(dfm.trim)

#Number of features in trimmed DFM
nfeature(dfm.trim)
```



```{r tfidf}

dfm.w <- dfm_weight(dfm.trim, type = "tfidf")

```




#Analysis


###Wordscore based positions
One possiblity to assess positions of EU member states with text data is to use Wordscore -- a methodology familiar to political scientists. Here we applying it to UNGD speeches to calculate measures of polarization in the UN on a dimension structured by the main (?) conflictual dimension of international politics -- USA vs Russia. 


```{r wordscoreRUSA, include=FALSE}
#Wordscore estimations by year
rusa <- data.frame()
for (i in 1971:2016) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "RUS")] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

rusa <- rbind(rusa,wordscores.i)

}
```

One alternative would be to assess member states' positions on another dimension of competition -- USA vs China. This is more a trial as China-USA dimension may not be salient enough to structure foreign policy preferences of EU member states. At least not salient for now (?), but we should see convergence around 1989/1990.

```{r wordscoreCHNUSA, include=FALSE}
#Wordscore estimations by year
chnusa <- data.frame()
for (i in 1971:2016) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "CHN")] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

chnusa <- rbind(chnusa,wordscores.i)

}
```



```{r wordscoreDEUSA, include=FALSE}
#Wordscore estimations by year
deusa <- data.frame()
for (i in 1973:2016) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "DEU")] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

deusa <- rbind(deusa,wordscores.i)

}
```





```{r GermanyRussia, include=FALSE}
#Wordscore estimations by year

derus <- data.frame()
for (i in 1973:2016) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "RUS")] <- -1
refscores[str_detect(rownames(dfm.w), "DEU")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

derus <- rbind(derus,wordscores.i)

}
```




```{r wordscoreLUXERUS, include=FALSE}
#Wordscore estimations by year
luxerus <- data.frame()
for (i in  c(1971:1974, 1977:2016)) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "RUS")] <- -1
refscores[str_detect(rownames(dfm.w), "LUX")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

luxerus <- rbind(luxerus,wordscores.i)

}
```






```{r wordscoreLUXUS, include=FALSE}
#Wordscore estimations by year
luxus <- data.frame()
for (i in  c(1971:1974, 1977:2016)) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "LUX")] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

luxus <- rbind(luxus,wordscores.i)

}
```





```{r pal, include=FALSE}
#Wordscore estimations by year
pal <- data.frame()
for (i in  1998:2016) {
#Creating corpus for 2014, for Wordscore example below
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)

dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- quanteda::dfm_weight(dfm.trim, type = "tfidf")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "PSE")] <- -1
refscores[str_detect(rownames(dfm.w), "ISR")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

pal <- rbind(pal,wordscores.i)

}
```


```{r EU president, include=FALSE}
presidency <- readxl::read_excel("presidency.xlsx")

```



```{r PresidencyUSA, include=FALSE}

presus <- data.frame()

for (i in c(1970:2016)) {
  
#Creating corpus for each year
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")


#holders for country names in distance measures below
pres <- paste(presidency$Country[presidency$Year==i], i, sep="_")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[rownames(dfm.w)==pres ] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

presus <- rbind(presus,wordscores.i)

}

```




```{r PresidencyALTusa, include=FALSE}
#EU president

presusalt <- data.frame()

for (i in c(1970:2016)) {
  
#Creating corpus for each year
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")


#holders for country names in distance measures below
pres_alt <- paste(presidency$Country_alt[presidency$Year==i], i, sep="_")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[rownames(dfm.w)==pres_alt ] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

presusalt <- rbind(presusalt,wordscores.i)

}

```





```{r PresidencyRussia, include=FALSE}
#EU president


presrus <- data.frame()

for (i in c(1971:2016)) {
  
#Creating corpus for each year
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")


#holders for country names in distance measures below
pres <- paste(presidency$Country[presidency$Year==i], i, sep="_")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "RUS")] <- -1
refscores[rownames(dfm.w)==pres ] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

presrus <- rbind(presrus,wordscores.i)

}

```




```{r PresidencyALTrus, include=FALSE}
#EU president

presrusalt <- data.frame()

for (i in c(1971:2016)) {
  
#Creating corpus for each year
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")


#holders for country names in distance measures below
pres_alt <- paste(presidency$Country_alt[presidency$Year==i], i, sep="_")


#Reference scores
refscores <- rep(NA,nrow(dfm.w))

refscores[str_detect(rownames(dfm.w), "RUS")] <- -1
refscores[rownames(dfm.w)==pres_alt ] <- 1

#Wordscore model
ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)
wordscore <- predict(ws, rescaling="none")

#Writing the results into data frame
wordscores.i <- data.frame(cbind(docvars(ungdc.i), wordscore@textscores$textscore_raw))

wordscores.i <- dplyr::rename(wordscores.i, wscore = wordscore.textscores.textscore_raw)

presrusalt <- rbind(presrusalt,wordscores.i)

}

```



We can look at standard deviation of EU positions on these two dimensions. The hypothesis is that there's increasing convergence (lower variation) between EU member states as a result of socialization.  

```{r}
#Logicals for EU member states
EU <- c("BEL", "FRA", "DEU", "ITA", "LUX", "NLD")
```


```{r}
rusa <- rusa %>% mutate(is.eu = Country %in% EU)

#first wave
rusa$is.eu[rusa$Country== "DNK" & rusa$Year >1972] <- TRUE
rusa$is.eu[rusa$Country== "IRL" & rusa$Year >1972] <- TRUE
rusa$is.eu[rusa$Country== "GBR" & rusa$Year >1972] <- TRUE

#second wave
rusa$is.eu[rusa$Country== "GRC" & rusa$Year >1980] <- TRUE

#third wave
rusa$is.eu[rusa$Country== "ESP" & rusa$Year >1985] <- TRUE
rusa$is.eu[rusa$Country== "PRT" & rusa$Year >1985] <- TRUE

#fourth wave
rusa$is.eu[rusa$Country== "AUT" & rusa$Year >1994] <- TRUE
rusa$is.eu[rusa$Country== "FIN" & rusa$Year >1994] <- TRUE
rusa$is.eu[rusa$Country== "SWE" & rusa$Year >1994] <- TRUE

#fifth wave
rusa$is.eu[rusa$Country== "CZE" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "HUN" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "POL" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "EST" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "LVA" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "LTU" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "CYP" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "MLT" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "SVK" & rusa$Year >2003] <- TRUE
rusa$is.eu[rusa$Country== "SVN" & rusa$Year >2003] <- TRUE

#sixth wave
rusa$is.eu[rusa$Country== "BGR" & rusa$Year >2006] <- TRUE
rusa$is.eu[rusa$Country== "ROU" & rusa$Year >2006] <- TRUE

#seventh wave
rusa$is.eu[rusa$Country== "HRV" & rusa$Year >2012] <- TRUE
```


```{r}
#The same for the China-US dimension:

chnusa <- chnusa %>% mutate(is.eu = Country %in% EU)

#first wave
chnusa$is.eu[chnusa$Country== "DNK" & chnusa$Year >1972] <- TRUE
chnusa$is.eu[chnusa$Country== "IRL" & chnusa$Year >1972] <- TRUE
chnusa$is.eu[chnusa$Country== "GBR" & chnusa$Year >1972] <- TRUE

#second wave
chnusa$is.eu[chnusa$Country== "GRC" & chnusa$Year >1980] <- TRUE

#third wave
chnusa$is.eu[chnusa$Country== "ESP" & chnusa$Year >1985] <- TRUE
chnusa$is.eu[chnusa$Country== "PRT" & chnusa$Year >1985] <- TRUE

#fourth wave
chnusa$is.eu[chnusa$Country== "AUT" & chnusa$Year >1994] <- TRUE
chnusa$is.eu[chnusa$Country== "FIN" & chnusa$Year >1994] <- TRUE
chnusa$is.eu[chnusa$Country== "SWE" & chnusa$Year >1994] <- TRUE

#fifth wave
chnusa$is.eu[chnusa$Country== "CZE" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "HUN" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "POL" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "EST" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "LVA" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "LTU" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "CYP" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "MLT" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "SVK" & chnusa$Year >2003] <- TRUE
chnusa$is.eu[chnusa$Country== "SVN" & chnusa$Year >2003] <- TRUE

#sixth wave
chnusa$is.eu[chnusa$Country== "BGR" & chnusa$Year >2006] <- TRUE
chnusa$is.eu[chnusa$Country== "ROU" & chnusa$Year >2006] <- TRUE

#seventh wave
chnusa$is.eu[chnusa$Country== "HRV" & chnusa$Year >2012] <- TRUE
```


```{r}
derus <- derus %>% mutate(is.eu = Country %in% EU)

#first wave
derus$is.eu[derus$Country== "DNK" & derus$Year >1972] <- TRUE
derus$is.eu[derus$Country== "IRL" & derus$Year >1972] <- TRUE
derus$is.eu[derus$Country== "GBR" & derus$Year >1972] <- TRUE

#second wave
derus$is.eu[derus$Country== "GRC" & derus$Year >1980] <- TRUE

#third wave
derus$is.eu[derus$Country== "ESP" & derus$Year >1985] <- TRUE
derus$is.eu[derus$Country== "PRT" & derus$Year >1985] <- TRUE

#fourth wave
derus$is.eu[derus$Country== "AUT" & derus$Year >1994] <- TRUE
derus$is.eu[derus$Country== "FIN" & derus$Year >1994] <- TRUE
derus$is.eu[derus$Country== "SWE" & derus$Year >1994] <- TRUE

#fifth wave
derus$is.eu[derus$Country== "CZE" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "HUN" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "POL" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "EST" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "LVA" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "LTU" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "CYP" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "MLT" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "SVK" & derus$Year >2003] <- TRUE
derus$is.eu[derus$Country== "SVN" & derus$Year >2003] <- TRUE

#sixth wave
derus$is.eu[derus$Country== "BGR" & derus$Year >2006] <- TRUE
derus$is.eu[derus$Country== "ROU" & derus$Year >2006] <- TRUE

#seventh wave
derus$is.eu[derus$Country== "HRV" & derus$Year >2012] <- TRUE
```



```{r}
deusa <- deusa %>% mutate(is.eu = Country %in% EU)

#first wave
deusa$is.eu[deusa$Country== "DNK" & deusa$Year >1972] <- TRUE
deusa$is.eu[deusa$Country== "IRL" & deusa$Year >1972] <- TRUE
deusa$is.eu[deusa$Country== "GBR" & deusa$Year >1972] <- TRUE

#second wave
deusa$is.eu[deusa$Country== "GRC" & deusa$Year >1980] <- TRUE

#third wave
deusa$is.eu[deusa$Country== "ESP" & deusa$Year >1985] <- TRUE
deusa$is.eu[deusa$Country== "PRT" & deusa$Year >1985] <- TRUE

#fourth wave
deusa$is.eu[deusa$Country== "AUT" & deusa$Year >1994] <- TRUE
deusa$is.eu[deusa$Country== "FIN" & deusa$Year >1994] <- TRUE
deusa$is.eu[deusa$Country== "SWE" & deusa$Year >1994] <- TRUE

#fifth wave
deusa$is.eu[deusa$Country== "CZE" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "HUN" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "POL" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "EST" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "LVA" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "LTU" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "CYP" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "MLT" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "SVK" & deusa$Year >2003] <- TRUE
deusa$is.eu[deusa$Country== "SVN" & deusa$Year >2003] <- TRUE

#sixth wave
deusa$is.eu[deusa$Country== "BGR" & deusa$Year >2006] <- TRUE
deusa$is.eu[deusa$Country== "ROU" & deusa$Year >2006] <- TRUE

#seventh wave
deusa$is.eu[deusa$Country== "HRV" & deusa$Year >2012] <- TRUE

```


```{r}
luxerus <- luxerus %>% mutate(is.eu = Country %in% EU)

#first wave
luxerus$is.eu[luxerus$Country== "DNK" & luxerus$Year >1972] <- TRUE
luxerus$is.eu[luxerus$Country== "IRL" & luxerus$Year >1972] <- TRUE
luxerus$is.eu[luxerus$Country== "GBR" & luxerus$Year >1972] <- TRUE

#second wave
luxerus$is.eu[luxerus$Country== "GRC" & luxerus$Year >1980] <- TRUE

#third wave
luxerus$is.eu[luxerus$Country== "ESP" & luxerus$Year >1985] <- TRUE
luxerus$is.eu[luxerus$Country== "PRT" & luxerus$Year >1985] <- TRUE

#fourth wave
luxerus$is.eu[luxerus$Country== "AUT" & luxerus$Year >1994] <- TRUE
luxerus$is.eu[luxerus$Country== "FIN" & luxerus$Year >1994] <- TRUE
luxerus$is.eu[luxerus$Country== "SWE" & luxerus$Year >1994] <- TRUE

#fifth wave
luxerus$is.eu[luxerus$Country== "CZE" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "HUN" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "POL" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "EST" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "LVA" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "LTU" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "CYP" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "MLT" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "SVK" & luxerus$Year >2003] <- TRUE
luxerus$is.eu[luxerus$Country== "SVN" & luxerus$Year >2003] <- TRUE

#sixth wave
luxerus$is.eu[luxerus$Country== "BGR" & luxerus$Year >2006] <- TRUE
luxerus$is.eu[luxerus$Country== "ROU" & luxerus$Year >2006] <- TRUE

#seventh wave
luxerus$is.eu[luxerus$Country== "HRV" & luxerus$Year >2012] <- TRUE

```


```{r}
luxus <- luxus %>% mutate(is.eu = Country %in% EU)

#first wave
luxus$is.eu[luxus$Country== "DNK" & luxus$Year >1972] <- TRUE
luxus$is.eu[luxus$Country== "IRL" & luxus$Year >1972] <- TRUE
luxus$is.eu[luxus$Country== "GBR" & luxus$Year >1972] <- TRUE

#second wave
luxus$is.eu[luxus$Country== "GRC" & luxus$Year >1980] <- TRUE

#third wave
luxus$is.eu[luxus$Country== "ESP" & luxus$Year >1985] <- TRUE
luxus$is.eu[luxus$Country== "PRT" & luxus$Year >1985] <- TRUE

#fourth wave
luxus$is.eu[luxus$Country== "AUT" & luxus$Year >1994] <- TRUE
luxus$is.eu[luxus$Country== "FIN" & luxus$Year >1994] <- TRUE
luxus$is.eu[luxus$Country== "SWE" & luxus$Year >1994] <- TRUE

#fifth wave
luxus$is.eu[luxus$Country== "CZE" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "HUN" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "POL" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "EST" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "LVA" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "LTU" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "CYP" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "MLT" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "SVK" & luxus$Year >2003] <- TRUE
luxus$is.eu[luxus$Country== "SVN" & luxus$Year >2003] <- TRUE

#sixth wave
luxus$is.eu[luxus$Country== "BGR" & luxus$Year >2006] <- TRUE
luxus$is.eu[luxus$Country== "ROU" & luxus$Year >2006] <- TRUE

#seventh wave
luxus$is.eu[luxus$Country== "HRV" & luxus$Year >2012] <- TRUE
```


```{r}
pal <- pal %>% mutate(is.eu = Country %in% EU)

#first wave
pal$is.eu[pal$Country== "DNK" & pal$Year >1972] <- TRUE
pal$is.eu[pal$Country== "IRL" & pal$Year >1972] <- TRUE
pal$is.eu[pal$Country== "GBR" & pal$Year >1972] <- TRUE

#second wave
pal$is.eu[pal$Country== "GRC" & pal$Year >1980] <- TRUE

#third wave
pal$is.eu[pal$Country== "ESP" & pal$Year >1985] <- TRUE
pal$is.eu[pal$Country== "PRT" & pal$Year >1985] <- TRUE

#fourth wave
pal$is.eu[pal$Country== "AUT" & pal$Year >1994] <- TRUE
pal$is.eu[pal$Country== "FIN" & pal$Year >1994] <- TRUE
pal$is.eu[pal$Country== "SWE" & pal$Year >1994] <- TRUE

#fifth wave
pal$is.eu[pal$Country== "CZE" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "HUN" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "POL" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "EST" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "LVA" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "LTU" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "CYP" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "MLT" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "SVK" & pal$Year >2003] <- TRUE
pal$is.eu[pal$Country== "SVN" & pal$Year >2003] <- TRUE

#sixth wave
pal$is.eu[pal$Country== "BGR" & pal$Year >2006] <- TRUE
pal$is.eu[pal$Country== "ROU" & pal$Year >2006] <- TRUE

#seventh wave
pal$is.eu[pal$Country== "HRV" & pal$Year >2012] <- TRUE

```


```{r}
presus <- presus %>% mutate(is.eu = Country %in% EU)

#first wave
presus$is.eu[presus$Country== "DNK" & presus$Year >1972] <- TRUE
presus$is.eu[presus$Country== "IRL" & presus$Year >1972] <- TRUE
presus$is.eu[presus$Country== "GBR" & presus$Year >1972] <- TRUE

#second wave
presus$is.eu[presus$Country== "GRC" & presus$Year >1980] <- TRUE

#third wave
presus$is.eu[presus$Country== "ESP" & presus$Year >1985] <- TRUE
presus$is.eu[presus$Country== "PRT" & presus$Year >1985] <- TRUE

#fourth wave
presus$is.eu[presus$Country== "AUT" & presus$Year >1994] <- TRUE
presus$is.eu[presus$Country== "FIN" & presus$Year >1994] <- TRUE
presus$is.eu[presus$Country== "SWE" & presus$Year >1994] <- TRUE

#fifth wave
presus$is.eu[presus$Country== "CZE" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "HUN" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "POL" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "EST" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "LVA" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "LTU" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "CYP" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "MLT" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "SVK" & presus$Year >2003] <- TRUE
presus$is.eu[presus$Country== "SVN" & presus$Year >2003] <- TRUE

#sixth wave
presus$is.eu[presus$Country== "BGR" & presus$Year >2006] <- TRUE
presus$is.eu[presus$Country== "ROU" & presus$Year >2006] <- TRUE

#seventh wave
presus$is.eu[presus$Country== "HRV" & presus$Year >2012] <- TRUE
```


```{r}
presusalt <- presusalt %>% mutate(is.eu = Country %in% EU)

#first wave
presusalt$is.eu[presusalt$Country== "DNK" & presusalt$Year >1972] <- TRUE
presusalt$is.eu[presusalt$Country== "IRL" & presusalt$Year >1972] <- TRUE
presusalt$is.eu[presusalt$Country== "GBR" & presusalt$Year >1972] <- TRUE

#second wave
presusalt$is.eu[presusalt$Country== "GRC" & presusalt$Year >1980] <- TRUE

#third wave
presusalt$is.eu[presusalt$Country== "ESP" & presusalt$Year >1985] <- TRUE
presusalt$is.eu[presusalt$Country== "PRT" & presusalt$Year >1985] <- TRUE

#fourth wave
presusalt$is.eu[presusalt$Country== "AUT" & presusalt$Year >1994] <- TRUE
presusalt$is.eu[presusalt$Country== "FIN" & presusalt$Year >1994] <- TRUE
presusalt$is.eu[presusalt$Country== "SWE" & presusalt$Year >1994] <- TRUE

#fifth wave
presusalt$is.eu[presusalt$Country== "CZE" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "HUN" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "POL" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "EST" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "LVA" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "LTU" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "CYP" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "MLT" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "SVK" & presusalt$Year >2003] <- TRUE
presusalt$is.eu[presusalt$Country== "SVN" & presusalt$Year >2003] <- TRUE

#sixth wave
presusalt$is.eu[presusalt$Country== "BGR" & presusalt$Year >2006] <- TRUE
presusalt$is.eu[presusalt$Country== "ROU" & presusalt$Year >2006] <- TRUE

#seventh wave
presusalt$is.eu[presusalt$Country== "HRV" & presusalt$Year >2012] <- TRUE
```


```{r}
presrus <- presrus %>% mutate(is.eu = Country %in% EU)

#first wave
presrus$is.eu[presrus$Country== "DNK" & presrus$Year >1972] <- TRUE
presrus$is.eu[presrus$Country== "IRL" & presrus$Year >1972] <- TRUE
presrus$is.eu[presrus$Country== "GBR" & presrus$Year >1972] <- TRUE

#second wave
presrus$is.eu[presrus$Country== "GRC" & presrus$Year >1980] <- TRUE

#third wave
presrus$is.eu[presrus$Country== "ESP" & presrus$Year >1985] <- TRUE
presrus$is.eu[presrus$Country== "PRT" & presrus$Year >1985] <- TRUE

#fourth wave
presrus$is.eu[presrus$Country== "AUT" & presrus$Year >1994] <- TRUE
presrus$is.eu[presrus$Country== "FIN" & presrus$Year >1994] <- TRUE
presrus$is.eu[presrus$Country== "SWE" & presrus$Year >1994] <- TRUE

#fifth wave
presrus$is.eu[presrus$Country== "CZE" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "HUN" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "POL" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "EST" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "LVA" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "LTU" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "CYP" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "MLT" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "SVK" & presrus$Year >2003] <- TRUE
presrus$is.eu[presrus$Country== "SVN" & presrus$Year >2003] <- TRUE

#sixth wave
presrus$is.eu[presrus$Country== "BGR" & presrus$Year >2006] <- TRUE
presrus$is.eu[presrus$Country== "ROU" & presrus$Year >2006] <- TRUE

#seventh wave
presrus$is.eu[presrus$Country== "HRV" & presrus$Year >2012] <- TRUE
```


```{r}
presrusalt <- presrusalt %>% mutate(is.eu = Country %in% EU)

#first wave
presrusalt$is.eu[presrusalt$Country== "DNK" & presrusalt$Year >1972] <- TRUE
presrusalt$is.eu[presrusalt$Country== "IRL" & presrusalt$Year >1972] <- TRUE
presrusalt$is.eu[presrusalt$Country== "GBR" & presrusalt$Year >1972] <- TRUE

#second wave
presrusalt$is.eu[presrusalt$Country== "GRC" & presrusalt$Year >1980] <- TRUE

#third wave
presrusalt$is.eu[presrusalt$Country== "ESP" & presrusalt$Year >1985] <- TRUE
presrusalt$is.eu[presrusalt$Country== "PRT" & presrusalt$Year >1985] <- TRUE

#fourth wave
presrusalt$is.eu[presrusalt$Country== "AUT" & presrusalt$Year >1994] <- TRUE
presrusalt$is.eu[presrusalt$Country== "FIN" & presrusalt$Year >1994] <- TRUE
presrusalt$is.eu[presrusalt$Country== "SWE" & presrusalt$Year >1994] <- TRUE

#fifth wave
presrusalt$is.eu[presrusalt$Country== "CZE" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "HUN" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "POL" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "EST" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "LVA" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "LTU" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "CYP" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "MLT" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "SVK" & presrusalt$Year >2003] <- TRUE
presrusalt$is.eu[presrusalt$Country== "SVN" & presrusalt$Year >2003] <- TRUE

#sixth wave
presrusalt$is.eu[presrusalt$Country== "BGR" & presrusalt$Year >2006] <- TRUE
presrusalt$is.eu[presrusalt$Country== "ROU" & presrusalt$Year >2006] <- TRUE

#seventh wave
presrusalt$is.eu[presrusalt$Country== "HRV" & presrusalt$Year >2012] <- TRUE
```



Calculating average scores and standard deviations for EU member states.

```{r}

eu_rusa <- summarise(group_by(filter(rusa, is.eu==TRUE), Year), 
                      score_rusa = mean(wscore), sd_rusa = sd(wscore))



eu_chnusa <- summarise(group_by(filter(chnusa, is.eu==TRUE), Year), 
                      score_chn = mean(wscore), sd_chn = sd(wscore))


eu_derus <- summarise(group_by(filter(derus, is.eu==TRUE), Year), 
                      score_derus = mean(wscore), sd_derus = sd(wscore))


eu_deusa <- summarise(group_by(filter(deusa, is.eu==TRUE), Year), 
                      score_deusa = mean(wscore), sd_deusa = sd(wscore))


eu_luxerus <- summarise(group_by(filter(luxerus, is.eu==TRUE), Year), 
                      score_luxerus = mean(wscore), sd_luxerus = sd(wscore))


eu_luxus <- summarise(group_by(filter(luxus, is.eu==TRUE), Year), 
                      score_luxus = mean(wscore), sd_luxus = sd(wscore))


eu_pal <- summarise(group_by(filter(pal, is.eu==TRUE), Year), 
                      score_pal = mean(wscore), sd_pal = sd(wscore))


eu_presus <- summarise(group_by(filter(presus, is.eu==TRUE), Year), 
                      score_presus = mean(wscore), sd_presus = sd(wscore))

eu_presusalt <- summarise(group_by(filter(presusalt, is.eu==TRUE), Year), 
                      score_presusalt = mean(wscore), sd_presusalt = sd(wscore))


eu_presrus <- summarise(group_by(filter(presrus, is.eu==TRUE), Year), 
                      score_presrus = mean(wscore), sd_presrus = sd(wscore))



eu_presrusalt <- summarise(group_by(filter(presrusalt, is.eu==TRUE), Year), 
                      score_presrusalt = mean(wscore), sd_presrusalt = sd(wscore))



eu_scores <- left_join(eu_rusa, eu_chnusa, by="Year" )

eu_scores <- left_join(eu_scores, eu_derus, by="Year" )

eu_scores <- left_join(eu_scores, eu_deusa, by="Year" )

eu_scores <- left_join(eu_scores, eu_luxerus, by="Year" )

eu_scores <- left_join(eu_scores, eu_luxus, by="Year" )

eu_scores <- left_join(eu_scores, eu_pal, by="Year" )

eu_scores <- left_join(eu_scores, eu_presus, by="Year" )

eu_scores <- left_join(eu_scores, eu_presusalt, by="Year" )

eu_scores <- left_join(eu_scores, eu_presrus, by="Year" )

eu_scores <- left_join(eu_scores, eu_presrusalt, by="Year" )


```





###Plots with wordscore results

####Averages

```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_rusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_rusa), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - Russia (-1) dimension") +
  theme_bw()

ggsave("avscoreRUSA.pdf")
```


```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_chn), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_chn), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - China (-1) dimension") +
  theme_bw()

ggsave("avscoreCHN.pdf")
```



```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_derus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_derus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("DEU (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("avscoreDERUS.pdf")
```




```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_deusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_deusa), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - DEU (-1) dimension") +
  theme_bw()

ggsave("avscoreDEUSA.pdf")
```


```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_luxerus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_luxerus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("LUX (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("avscoreLUXERUS.pdf")
```

```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_luxus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_luxus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - LUX (-1) dimension") +
  theme_bw()

ggsave("avscoreLUXUS.pdf")
```


```{r}
#Plot of average scores
ggplot(data = subset(eu_scores, Year>1997)) +
  geom_point(aes(x=Year, y=score_pal), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_pal), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("ISR (+1) - PSE (-1) dimension") +
  theme_bw()

ggsave("avscorePAL.pdf")
```


```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - Presidency (-1) dimension") +
  theme_bw()

ggsave("avscorePRESUS.pdf")
```


```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presusalt), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("USA (+1) - PresidencyALT (-1) dimension") +
  theme_bw()

ggsave("avscorePRESUSalt.pdf")
```



```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presrus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presrus), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("Presidency (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("avscorePRESRUS.pdf")
```



```{r}
#Plot of average scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=score_presrusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=score_presrusalt), colour = "blue", se = FALSE) +
  ylab("Average wordscore for EU") +
  ggtitle("PresidencyALT (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("avscorePRESRUSalt.pdf")
```



####Standard deviations


```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_rusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_rusa), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - Russia (-1) dimension") +
  theme_bw()

ggsave("sdscoreRUSA.pdf")
```


```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_chn), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_chn), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - China (-1) dimension") +
  theme_bw()

ggsave("sdscoreCHN.pdf")
```



```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_derus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_derus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("DEU (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("sdscoreDERUS.pdf")
```



```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_deusa), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_deusa), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - DEU (-1) dimension") +
  theme_bw()

ggsave("sdscoreDEUSA.pdf")
```



```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_luxerus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_luxerus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("LUX (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("sdscoreLUXERUS.pdf")
```



```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_luxus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_luxus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - LUX (-1) dimension") +
  theme_bw()

ggsave("sdscoreLUXUS.pdf")
```



```{r}
#Plot of sd scores
ggplot(data = subset(eu_scores, Year>1997)) +
  geom_point(aes(x=Year, y=sd_pal), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_pal), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("ISR (+1) - PSE (-1) dimension") +
  theme_bw()

ggsave("sdscorePAL.pdf")
```


```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - Presidency (-1) dimension") +
  theme_bw()

ggsave("sdscorePRESUS.pdf")
```


```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presusalt), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("USA (+1) - PresidencyALT (-1) dimension") +
  theme_bw()

ggsave("sdscorePRESUSalt.pdf")
```

```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presrus), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presrus), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("Presidency (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("sdscorePRESRUS.pdf")
```


```{r}
#Plot of sd scores
ggplot(data = eu_scores) +
  geom_point(aes(x=Year, y=sd_presrusalt), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_presrusalt), colour = "blue", se = FALSE) +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("PresidencyALT (+1) - RUS (-1) dimension") +
  theme_bw()

ggsave("sdscorePRESRUSalt.pdf")
```




###Similarity based socialization

As a direct measure of convergence in foreign policy preferences we calculate similarity between each speech and the official EU policy as stated in speeches by the presiding EU member state, and, after 2007, the speech of EC president. In addition, we calculate similarity to the speech by Luxembourg which in our interviews came through as the country most in line with the EU line. 

We use cosine similarity, running estimation by year: i.e. calculates similarity between each country and EU presidency/EC/LUX for each session separately.


```{r, include=FALSE}

#Distance estimations by year
similarity <- data.frame()
for (i in c(1971:1974, 1977:2016)) {
#Creating corpus for 2014

ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")

#holders for country names in distance measures below
lux <- paste0("LUX_", i, sep="")

#Cosine similarity calculations
similarities.i <- as.data.frame(as.list(textstat_simil(dfm.w, lux, 
                                                   margin = "documents", 
                                                   method = "cosine"), 
                                     sorted = FALSE)[1])

names(similarities.i)[1] <- "LUX"

similarity <- rbind(similarity,similarities.i)

}

LUX_similarity <- cbind(str_split(row.names(similarity), "_", simplify = TRUE), similarity)

names(LUX_similarity)[1] <- "Country"
names(LUX_similarity)[2] <- "Year"

LUX_similarity$Year <- as.numeric(as.character(LUX_similarity$Year))
LUX_similarity$Country <- as.character(LUX_similarity$Country)

```


Similarity with EU presidency


```{r, include=FALSE}
#EU president


pres_similarity <- data.frame()

for (i in c(1970:2016)) {
  
#Creating corpus for each year
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")

#holders for country names in distance measures below
pres <- paste(presidency$Country[presidency$Year==i], i, sep="_")

#Cosine similarity calculations
similarities.i <- as.data.frame(as.list(textstat_simil(dfm.w, pres, 
                                                   margin = "documents", 
                                                   method = "cosine"), 
                                     sorted = FALSE)[1])

names(similarities.i)[1] <- "PRES"

pres_similarity <- rbind(pres_similarity,similarities.i)

}

pres_similarity <- cbind(str_split(row.names(pres_similarity), "_", simplify = TRUE), pres_similarity)

names(pres_similarity)[1] <- "Country"
names(pres_similarity)[2] <- "Year"

pres_similarity$Year <- as.numeric(as.character(pres_similarity$Year))
pres_similarity$Country <- as.character(pres_similarity$Country)

```


Similarity with EU presidency, but from 2011 similarity is calculated with EC


```{r, include=FALSE}
#EU president
pres_similarity_ec <- data.frame()

for (i in c(1970:2016)) {
  
#Creating corpus for each year
ungdc.i <- corpus_subset(ungd_corpus, Year==i)

tok <- tokens(ungdc.i, what = "word",
              removePunct = TRUE,
              removeSymbols = TRUE,
              removeNumbers = TRUE,
              removeTwitter = TRUE,
              removeURL = TRUE,
              removeHyphens = TRUE,
              verbose = TRUE)


dfm <- dfm(tok, 
           tolower = TRUE,
           remove=stopwords("SMART"),
           stem=TRUE, 
           verbose = TRUE)

#Removing any digits. `dfm` picks up any separated digits, not digits that are part of tokens.
#Removing any punctuation. `dfm` picks up any punctuation unless it's part of a token.
#Removing any tokens less than four characters.
dfm.m <- dfm_select(dfm, c("[\\d-]", "[[:punct:]]", "^.{1,3}$"), selection = "remove", 
                    valuetype="regex", verbose = TRUE)


#Dropping words that appear less than 5 times and in less than 3 documents.
dfm.trim <- dfm_trim(dfm.m, min_count = 5, min_docfreq = 3)

#tfidf weighting
dfm.w <- dfm_weight(dfm.trim, type = "tfidf")

#holders for country names in distance measures below
pres <- paste(presidency$Country_alt[presidency$Year==i], i, sep="_")

#Cosine similarity calculations
similarities.i <- as.data.frame(as.list(textstat_simil(dfm.w, pres, 
                                                   margin = "documents", 
                                                   method = "cosine"), 
                                     sorted = FALSE)[1])

names(similarities.i)[1] <- "PRES_ec"

pres_similarity_ec <- rbind(pres_similarity_ec,similarities.i)

}


pres_similarity_ec <- cbind(str_split(row.names(pres_similarity_ec), "_", simplify = TRUE),
                            pres_similarity_ec)

names(pres_similarity_ec)[1] <- "Country"
names(pres_similarity_ec)[2] <- "Year"

pres_similarity_ec$Year <- as.numeric(as.character(pres_similarity_ec$Year))
pres_similarity_ec$Country <- as.character(pres_similarity_ec$Country)

```


Combining all similarity estimates into one file


```{r}
simil_estimates <- left_join(pres_similarity, pres_similarity_ec, by = c("Country", "Year"))
simil_estimates <- left_join(simil_estimates, LUX_similarity, by = c("Country", "Year"))
```


Plot of similarities for EU member states:


```{r}
#Logicals for EU member states

simil_estimates <- simil_estimates %>% mutate(is.eu = Country %in% EU)

#first wave
simil_estimates$is.eu[simil_estimates$Country== "DNK" & simil_estimates$Year >1972] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "IRL" & simil_estimates$Year >1972] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "GBR" & simil_estimates$Year >1972] <- TRUE

#second wave
simil_estimates$is.eu[simil_estimates$Country== "GRC" & simil_estimates$Year >1980] <- TRUE

#third wave
simil_estimates$is.eu[simil_estimates$Country== "ESP" & simil_estimates$Year >1985] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "PRT" & simil_estimates$Year >1985] <- TRUE

#fourth wave
simil_estimates$is.eu[simil_estimates$Country== "AUT" & simil_estimates$Year >1994] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "FIN" & simil_estimates$Year >1994] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "SWE" & simil_estimates$Year >1994] <- TRUE

#fifth wave
simil_estimates$is.eu[simil_estimates$Country== "CZE" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "HUN" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "POL" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "EST" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "LVA" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "LTU" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "CYP" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "MLT" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "SVK" & simil_estimates$Year >2003] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "SVN" & simil_estimates$Year >2003] <- TRUE

#sixth wave
simil_estimates$is.eu[simil_estimates$Country== "BGR" & simil_estimates$Year >2006] <- TRUE
simil_estimates$is.eu[simil_estimates$Country== "ROU" & simil_estimates$Year >2006] <- TRUE

#seventh wave
simil_estimates$is.eu[simil_estimates$Country== "HRV" & simil_estimates$Year >2012] <- TRUE
```


Creating averages and standard deviations for EU member states.

```{r}
eu_sim <- summarise(group_by(filter(simil_estimates, is.eu==TRUE), Year), 
                      simil_pres = mean(PRES), sd_simil_pres = sd(PRES), 
                      simil_pres_ec = mean(PRES_ec), sd_simil_pres_ec = sd(PRES_ec),
                      simil_lux = mean(LUX, na.rm = TRUE), sd_simil_lux = sd(LUX, na.rm = TRUE))

```


Plotting averages and standard deviations of similarities




```{r}
#Plot of average similarities
ggplot(data = eu_sim) +
  geom_point(aes(x=Year, y=simil_lux), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=simil_lux), na.rm = TRUE, se = FALSE, colour = "blue") +
  geom_point(aes(x=Year, y=simil_pres), colour = "green") +
  geom_smooth(aes(x=Year, y=simil_pres), se = FALSE, colour = "green") +
  geom_point(aes(x=Year, y=simil_pres_ec), colour = "red") +
  geom_smooth(aes(x=Year, y=simil_pres_ec), se = FALSE, colour = "red") +
  ylab("Average EU similarities") +
  annotate("text", x = 1983, y = 0.12, label = "Similarity with Luxembourg", colour = "blue") +
  annotate("text", x = 2000, y = 0.22, label = "Similarity with presidency", colour = "red") +
  theme_bw()

ggsave("avsimil.pdf")

```




```{r}
#Plot of SDs
ggplot(data = eu_sim) +
#  geom_point(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue") + 
#  geom_smooth(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue", se = FALSE) +
  geom_point(aes(x=Year, y=sd_simil_pres), colour = "green") +
  geom_smooth(aes(x=Year, y=sd_simil_pres), colour = "green", se = FALSE) +
  geom_point(aes(x=Year, y=sd_simil_pres_ec), colour = "red") +
  geom_smooth(aes(x=Year, y=sd_simil_pres_ec), colour = "red", se = FALSE) +
  ylab("SDs of EU similarities") +
 # annotate("text", x = 2000, y = 0.03, label = "Similarity with Luxembourg", colour = "blue") +
  annotate("text", x = 1990, y = 0.075, label = "Similarity with presidency", colour = "red") +
  theme_bw()

ggsave("sdsimil.pdf")


```

Similarity with Luxembourg

```{r}
#Plot of SDs
ggplot(data = eu_sim) +
  geom_point(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue") + 
  geom_smooth(aes(x=Year, y=sd_simil_lux), na.rm = TRUE, colour = "blue", se = FALSE) +
#  geom_point(aes(x=Year, y=sd_simil_pres), colour = "green") +
#  geom_smooth(aes(x=Year, y=sd_simil_pres), colour = "green", se = FALSE) +
#  geom_point(aes(x=Year, y=sd_simil_pres_ec), colour = "red") +
#  geom_smooth(aes(x=Year, y=sd_simil_pres_ec), colour = "red", se = FALSE) +
  ylab("SDs of EU similarities") +
  annotate("text", x = 2000, y = 0.03, label = "Similarity with Luxembourg", colour = "blue") +
#  annotate("text", x = 1990, y = 0.075, label = "Similarity with presidency", colour = "red") +
  theme_bw()

ggsave("sdsimillux.pdf")


```


Combining files for output:

```{r}

readr::write_csv(left_join(eu_sim, eu_scores, by = "Year"), "eu_estimates.csv")

names(presus)[4] <- "PRESUS_wscore"
names(presusalt)[4] <- "PRESUS_alt_wscore"
names(presrus)[4] <- "PRESRUS_wscore"
names(presrusalt)[4] <- "PRESRUS_alt_wscore"
names(derus)[4] <- "DERUS_wscore"
names(deusa)[4] <- "DEUSA_wscore"
names(luxus)[4] <- "LUXUS_wscore"
names(luxerus)[4] <- "LUXERUS_wscore"
names(pal)[4] <- "PAL_wscore"
names(rusa)[4] <- "RUSA_wscore"
names(chnusa)[4] <- "CHNUSA_wscore"

scores <- full_join(presus, presusalt, by=c("Country", "Year"))
scores <- full_join(scores, presrus, by=c("Country", "Year"))
scores <- full_join(scores, presrusalt, by=c("Country", "Year"))
scores <- full_join(scores, derus, by=c("Country", "Year"))
scores <- full_join(scores, deusa, by=c("Country", "Year"))
scores <- full_join(scores, luxus, by=c("Country", "Year"))
scores <- full_join(scores, luxerus, by=c("Country", "Year"))
scores <- full_join(scores, pal, by=c("Country", "Year"))
scores <- full_join(scores, rusa, by=c("Country", "Year"))
scores <- full_join(scores, chnusa, by=c("Country", "Year"))


simil_scores <- full_join(scores, simil_estimates, by=c("Country", "Year"))

readr::write_csv(select(simil_scores[order(simil_scores$Country, simil_scores$Year),], 
                        c(Country, Year, PRES, PRES_ec, LUX, RUSA_wscore, CHNUSA_wscore, 
                          PRESUS_wscore, PRESUS_alt_wscore, PRESRUS_wscore, 
                          PRESRUS_alt_wscore, DERUS_wscore, DEUSA_wscore, LUXUS_wscore, 
                          LUXERUS_wscore, PAL_wscore)), "estimates.csv")

```


##Splines analysis

```{r}
library(splines)

spl1 <- lm(sd_rusa ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_scores)
pred1 <- predict(spl1, se = TRUE)

ggplot(eu_scores, aes(Year, sd_rusa)) + 
  geom_ribbon(aes(ymin= pred1$fit-2*pred1$se.fit, ymax=pred1$fit+2*pred1$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred1$fit)) +
  theme_bw() +
    ylab("Standard deviation of wordscores for EU") +
  ggtitle("Splines for USA-Russia dimension")

ggsave("sdRUSAspline.pdf")

```





```{r}
spl2 <- lm(sd_chn ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_scores)

pred2 <- predict(spl2, se = TRUE)

ggplot(eu_scores, aes(Year, sd_chn)) + 
  geom_ribbon(aes(ymin= pred2$fit-2*pred2$se.fit, ymax=pred2$fit+2*pred2$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred2$fit)) +
  theme_bw() +
  ylab("Standard deviation of wordscores for EU") +
  ggtitle("Splines for USA-China dimension")


ggsave("sdCHNspline.pdf")
```



















```{r}
spl3 <- lm(sd_simil_pres ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_sim)

pred3 <- predict(spl3, se = TRUE)

ggplot(eu_sim, aes(Year, sd_simil_pres)) + 
  geom_ribbon(aes(ymin= pred3$fit-2*pred3$se.fit, ymax=pred3$fit+2*pred3$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred3$fit)) +
  theme_bw()

ggsave("sdsimilspline.pdf")

```







```{r}
spl4 <- lm(sd_simil_pres_ec ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = eu_sim)

pred4 <- predict(spl4, se = TRUE)

ggplot(eu_sim, aes(Year, sd_simil_pres_ec)) + 
  geom_ribbon(aes(ymin= pred4$fit-2*pred4$se.fit, ymax=pred4$fit+2*pred4$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred4$fit)) +
  theme_bw()

ggsave("sdsimilecspline.pdf")

```





```{r}
library(tidyr)

lux_sim <- eu_sim %>% drop_na(sd_simil_lux)

spl5 <- lm(sd_simil_lux ~ ns(Year, knots = c(1986,1993,1999, 2009)), data = lux_sim)

pred5 <- predict(spl5, se = TRUE)

ggplot(lux_sim, aes(Year, sd_simil_lux)) + 
  geom_ribbon(aes(ymin= pred5$fit-2*pred5$se.fit, ymax=pred5$fit+2*pred5$se.fit), 
              fill = "grey70", alpha = .5) +
  geom_point() +
  geom_line(aes(y=pred5$fit)) +
  theme_bw()

ggsave("sdsimilluxspline.pdf")

```






The coefficients here are essentially slopes of lines for each segment. 

```{r}
screenreg(list(spl1, spl2, spl3, spl4), digits = 3, bold = 0.05, stars = c(0.001, 0.01, 0.05),
          reorder.coef = c(2, 3, 4, 5, 6, 1), 
          custom.coef.names = c("Intercept", "1970-1986", "1987-1993", "1994-1999", 
                                "2000-2009", "2010-present day"), 
          custom.model.names = c("SD EU on RUSA", "SD EU on CHNUSA", "SD Simil Pres", 
                                 "SD Simil Pres (EC) "))

```



```{r}
texreg(list(spl1, spl2, spl3, spl4), digits = 3, bold = 0.05, stars = c(0.001, 0.01, 0.05),
          reorder.coef = c(2, 3, 4, 5, 6, 1), 
          custom.coef.names = c("Intercept", "1970-1986", "1987-1993", "1994-1999", 
                                "2000-2009", "2010-present day"), 
          custom.model.names = c("SD EU on RUSA", "SD EU on CHNUSA", "SD Simil Pres", 
                                 "SD Simil Pres (EC) "))

```





```{r}
texreg(spl5, digits = 3, bold = 0.05, stars = c(0.001, 0.01, 0.05),
          reorder.coef = c(2, 3, 4, 5, 6, 1), 
          custom.coef.names = c("Intercept", "1970-1986", "1987-1993", "1994-1999", 
                                "2000-2009", "2010-present day"), 
          custom.model.names = "SD Simil Lux")

```

# References